{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.xVerify.model import Model\n",
    "from src.xVerify.eval import Evaluator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Single sample evaluation test\n",
    "\n",
    "# initialization\n",
    "model_name = 'xVerify-0.5B-I'  # Model name\n",
    "url = 'https://your-anonymized-url/v1'  # Anonymized model path or URL\n",
    "inference_mode = 'api'  # Inference mode, 'local' or 'api'\n",
    "api_key = None  # API key used to access the model via API, if not available, set to None\n",
    "model = Model(\n",
    "    model_name=model_name,\n",
    "    model_path_or_url=url,\n",
    "    inference_mode=inference_mode,\n",
    "    api_key=api_key\n",
    ")\n",
    "evaluator = Evaluator(model=model)\n",
    "\n",
    "# input evaluation information,\n",
    "question = \"New steel giant includes Lackawanna site A major change is coming to the global steel industry and a galvanized mill in Lackawanna that formerly belonged to Bethlehem Steel Corp.\\nClassify the topic of the above sentence as World, Sports, Business, or Sci/Tech.\"\n",
    "llm_output = \"The answer is Business.\"\n",
    "correct_answer = \"Business\"\n",
    "\n",
    "# evaluation\n",
    "result = evaluator.single_evaluate(\n",
    "    question=question,\n",
    "    llm_output=llm_output,\n",
    "    correct_answer=correct_answer\n",
    ")\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# batch evaluation test\n",
    "\n",
    "# initialization\n",
    "model_name = 'xVerify-0.5B-I'  # Model name\n",
    "path = 'IAAR-Shanghai/xVerify-0.5B-I'  # Anonymized model path or URL\n",
    "inference_mode = 'local'  # Inference mode, 'local' or 'api'\n",
    "model = Model(\n",
    "    model_name=model_name,\n",
    "    model_path_or_url=url,\n",
    "    inference_mode=inference_mode,\n",
    "    api_key=api_key\n",
    ")\n",
    "evaluator = Evaluator(model=model)\n",
    "\n",
    "# set batch evaluation data file, and conduct evaluation.\n",
    "data_path = '/path/to/your/data/example.json' # Input the path of the dataset to be evaluated\n",
    "results = evaluator.evaluate(\n",
    "    data_path=data_path,\n",
    "    data_size=10,  # Set the number of evaluation samples\n",
    "    output_path='/path/to/save/results'\n",
    ")\n",
    "\n",
    "print(results)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
